import os
import hashlib
from Bio.SeqUtils.CheckSum import seguid
from Bio import SeqIO


directory = "."
filenames = os.listdir(directory)
n = 0
sequences = set()
for filename in filenames:
    library, extension = os.path.splitext(filename)
    if extension != ".fa":
        continue
    print("Reading", filename)
    handle = open(filename)
    records = SeqIO.parse(handle, "fasta")
    for record in records:
        seq = str(record.seq)
        sequences.add(seq)
    handle.close()
    n += 1
    print("%d files read; %d unique sequences" % (n, len(sequences)))

sequences = sorted(sequences, key=seguid)
# Iterating over a set will not return the sequences in the order in which
# they were added to the set; the order will even change between Python runs
# because the hash algorithm changes. Sort the sequences here to guarantee a
# consistent result between runs. Use seguid as the key to ensure that similar
# sequences end up in different locations in the seqlist to balance the running
# time of the alignment scripts.


filename = "seqlist.fa"
print("Writing", filename)
handle = open(filename, 'w')
for i, sequence in enumerate(sequences):
    handle.write(">seq_%08d\n" % i)
    handle.write("%s\n" % sequence)

handle.close()
